package com.stylefeng.guns.modular.system.processor;

import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import com.stylefeng.guns.common.persistence.model.CpciaNews;
import com.stylefeng.guns.core.util.DateUtil;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;

public class CpciaNewsProcessor  implements PageProcessor{
	
	public static final String URL_LIST = "http://www.cpcia\\.org\\.cn/news/hyfx/page_\\d+\\.shtml";

	public static final String URL_POST = "http://www.cpcia\\.org\\.cn/news/hyfx/2017-\\d+/\\d+\\.shtml";
	
	private Site site = Site.me().setDomain("cpcia.org.cn").setSleepTime(30).setUserAgent(
			"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.65 Safari/537.31");
	
	private static List<CpciaNews>  cpciaNewsList = new ArrayList<CpciaNews>();
	
	public List<CpciaNews> getCpciaNews(){
		Spider.create(new CpciaNewsProcessor()).addUrl("http://www.cpcia.org.cn/news/hyfx/page_1.shtml").run();
		return cpciaNewsList;
	} 
	
	@Override
	public void process(Page page) {
		// 列表页
		if (page.getUrl().regex(URL_LIST).match()) {
			page.addTargetRequests(page.getHtml().xpath("//div[@class=\"page_list\"]").links().regex(URL_POST).all());
			page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all());
			// 文章页
		} else {
			String title = page.getHtml().xpath("//div[@class='article_title']/h1").toString();
			String content = page.getHtml().xpath("//div[@class='article_content']/p").toString();
			String newsdate = page.getHtml().xpath("//div[@class='article_source']/span").toString();
			newsdate = newsdate.substring(9, newsdate.length()-7);
			title = title.replaceAll("<h1>", "");
			title = title.replaceAll("</h1>", "");
			Date newsDate = DateUtil.parseDate(newsdate);
			Date createDate = new Date();
			String newsDateString = DateUtil.getDays(newsDate);
			String createDateString = DateUtil.getDays(createDate);
			/*if (createDateString.equals(newsDateString)) {
				CpciaNews cpciaNews = new CpciaNews();
				cpciaNews.setTitle(title);
				cpciaNews.setContent(content);
				cpciaNews.setCreatedate(createDate);
				cpciaNews.setNewsdate(newsDate);
				cpciaNewsList.add(cpciaNews);
			}*/
			CpciaNews cpciaNews = new CpciaNews();
			cpciaNews.setTitle(title);
			cpciaNews.setContent(content);
			cpciaNews.setCreatedate(createDate);
			cpciaNews.setNewsdate(newsDate);
			cpciaNewsList.add(cpciaNews);
			
		}
	}
	
	@Override
	public Site getSite() {
		return site;
	}

}
